library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(phyloseq)
library(ggplot2)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:purrr':
## 
##     compact
library(dplyr)

Ordination

theme_set(theme_bw())
data("GlobalPatterns")
GP = GlobalPatterns
wh0 = genefilter_sample(GP, filterfun_sample(function(x) x > 5), A=0.5*nsamples(GP))
GP1 = prune_taxa(wh0, GP)
GP1 = transform_sample_counts(GP1, function(x) 1E6 * x/sum(x))
phylum.sum = tapply(taxa_sums(GP1), tax_table(GP1)[, "Phylum"], sum, na.rm=TRUE)
top5phyla = names(sort(phylum.sum, TRUE))[1:5]
GP1 = prune_taxa((tax_table(GP1)[, "Phylum"] %in% top5phyla), GP1)
human = get_variable(GP1, "SampleType") %in% c("Feces", "Mock", "Skin", "Tongue")
sample_data(GP1)$human <- factor(human)
GP.ord <- ordinate(GP1, "NMDS", "bray")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1333468 
## Run 1 stress 0.1486044 
## Run 2 stress 0.1333468 
## ... New best solution
## ... Procrustes: rmse 4.177022e-06  max resid 1.243326e-05 
## ... Similar to previous best
## Run 3 stress 0.1688708 
## Run 4 stress 0.1568955 
## Run 5 stress 0.1333468 
## ... Procrustes: rmse 1.987661e-06  max resid 4.829373e-06 
## ... Similar to previous best
## Run 6 stress 0.1612537 
## Run 7 stress 0.1450956 
## Run 8 stress 0.1530178 
## Run 9 stress 0.1545803 
## Run 10 stress 0.1719321 
## Run 11 stress 0.1449079 
## Run 12 stress 0.1736162 
## Run 13 stress 0.1542899 
## Run 14 stress 0.1664123 
## Run 15 stress 0.1637665 
## Run 16 stress 0.1333468 
## ... New best solution
## ... Procrustes: rmse 1.055481e-06  max resid 2.732988e-06 
## ... Similar to previous best
## Run 17 stress 0.1333468 
## ... Procrustes: rmse 4.298969e-06  max resid 1.271583e-05 
## ... Similar to previous best
## Run 18 stress 0.1333468 
## ... Procrustes: rmse 1.988254e-06  max resid 4.151417e-06 
## ... Similar to previous best
## Run 19 stress 0.1812212 
## Run 20 stress 0.174593 
## *** Solution reached
p1 = plot_ordination(GP1, GP.ord, type="taxa", color="Phylum", title="taxa")
print(p1)

p1 + facet_wrap(~Phylum, 3)

p2 = plot_ordination(GP1, GP.ord, type="samples", color="SampleType", shape="human")
p2 + geom_polygon(aes(fill=SampleType)) + geom_point(size=5) + ggtitle("samples")

p3 = plot_ordination(GP1, GP.ord, type="biplot", color="SampleType", shape="Phylum", title="biplot")

GP1.shape.names = get_taxa_unique(GP1, "Phylum")
GP1.shape <- 15:(15 + length(GP1.shape.names) - 1)
names(GP1.shape) <- GP1.shape.names
GP1.shape["samples"] <- 16
p3 + scale_shape_manual(values=GP1.shape)

p4 = plot_ordination(GP1, GP.ord, type="split", color="Phylum", shape="human", label="SampleType", title="split") 
p4

gg_color_hue <- function(n){
    hues = seq(15, 375, length=n+1)
    hcl(h=hues, l=65, c=100)[1:n]
}
color.names <- levels(p4$data$Phylum)
p4cols <- gg_color_hue(length(color.names))
names(p4cols) <- color.names
p4cols["samples"] <- "black"
p4 + scale_color_manual(values=p4cols)

dist = "bray"
ord_meths = c("DCA", "CCA", "RDA", "DPCoA", "NMDS", "MDS", "PCoA")
plist = llply(as.list(ord_meths), function(i, physeq, dist){
        ordi = ordinate(physeq, method=i, distance=dist)
        plot_ordination(physeq, ordi, "samples", color="SampleType")
}, GP1, dist)
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1333468 
## Run 1 stress 0.186079 
## Run 2 stress 0.1695636 
## Run 3 stress 0.1701135 
## Run 4 stress 0.1756862 
## Run 5 stress 0.1677043 
## Run 6 stress 0.259691 
## Run 7 stress 0.3416655 
## Run 8 stress 0.1706564 
## Run 9 stress 0.1469145 
## Run 10 stress 0.1530178 
## Run 11 stress 0.1779266 
## Run 12 stress 0.1333468 
## ... Procrustes: rmse 2.045409e-06  max resid 5.310926e-06 
## ... Similar to previous best
## Run 13 stress 0.1527459 
## Run 14 stress 0.1507161 
## Run 15 stress 0.1469145 
## Run 16 stress 0.1582496 
## Run 17 stress 0.1333468 
## ... New best solution
## ... Procrustes: rmse 1.658517e-06  max resid 4.180157e-06 
## ... Similar to previous best
## Run 18 stress 0.1824053 
## Run 19 stress 0.1777852 
## Run 20 stress 0.146027 
## *** Solution reached
names(plist) <- ord_meths
pdataframe = ldply(plist, function(x){
    df = x$data[, 1:2]
    colnames(df) = c("Axis_1", "Axis_2")
    return(cbind(df, x$data))
})
names(pdataframe)[1] = "method"
p = ggplot(pdataframe, aes(Axis_1, Axis_2, color=SampleType, shape=human, fill=SampleType)) + geom_point(size=4) + geom_polygon() + facet_wrap(~method, scales="free") + scale_fill_brewer(type="qual", palette="Set1") + scale_colour_brewer(type="qual", palette="Set1")
p

plist[[2]]

p = plist[[2]] + scale_colour_brewer(type="qual", palette="Set1") + scale_fill_brewer(type="qual", palette="Set1") + geom_point(size=5) + geom_polygon(aes(fill=SampleType))
p

ordu = ordinate(GP1, "PCoA", "unifrac", weighted=TRUE)
plot_ordination(GP1, ordu, color="SampleType", shape="human")

p = plot_ordination(GP1, ordu, color="SampleType", shape="human") + geom_point(size=7, alpha=0.75) + scale_colour_brewer(type="qual", palette="Set1") + ggtitle("MDS/PCoA on weighted-UniFrac distance, GlobalPatterns")
p

Alpha Diversity

pal = "Set1"
scale_colour_discrete <-  function(palname=pal, ...){
  scale_colour_brewer(palette=palname, ...)
}
scale_fill_discrete <-  function(palname=pal, ...){
  scale_fill_brewer(palette=palname, ...)
}
GP <- prune_species(speciesSums(GlobalPatterns) > 0, GlobalPatterns)
## Warning: 'prune_species' is deprecated.
## Use 'prune_taxa' instead.
## See help("Deprecated") and help("phyloseq-deprecated").
## Warning: 'speciesSums' is deprecated.
## Use 'taxa_sums' instead.
## See help("Deprecated") and help("phyloseq-deprecated").
plot_richness(GP)

plot_richness(GP, measures=c("Chao1", "Shannon"))

plot_richness(GP, x = "SampleType", measures=c("Chao1", "Shannon"))

sampleData(GP)$human <- getVariable(GP, "SampleType") %in% c("Feces", "Mock", "Skin", "Tongue")
## Warning: 'getVariable' is deprecated.
## Use 'get_variable' instead.
## See help("Deprecated") and help("phyloseq-deprecated").
## Warning: 'sampleData' is deprecated.
## Use 'sample_data' instead.
## See help("Deprecated") and help("phyloseq-deprecated").
## Warning: 'sampleData<-' is deprecated.
## Use 'sample_data<-' instead.
## See help("Deprecated") and help("phyloseq-deprecated").
plot_richness(GP, x="human", color="SampleType", measures=c("Chao1", "Shannon"))

GPst = merge_samples(GP, "SampleType")

sample_data(GPst)$SampleType <- factor(sample_names(GPst))
sample_data(GPst)$human <- as.logical(sample_data(GPst)$human)
p = plot_richness(GPst, x="human", color="SampleType", measures=c("Chao1", "Shannon")) + geom_point(size=5, alpha=0.7)
p

p$layers
## [[1]]
## geom_point: na.rm = TRUE
## stat_identity: na.rm = TRUE
## position_identity 
## 
## [[2]]
## mapping: ymax = ~value + se, ymin = ~value - se 
## geom_errorbar: na.rm = FALSE, orientation = NA, width = 0.1, width = 0.1, flipped_aes = FALSE
## stat_identity: na.rm = FALSE
## position_identity 
## 
## [[3]]
## geom_point: na.rm = FALSE, size = 5, alpha = 0.7
## stat_identity: na.rm = FALSE
## position_identity
p$layers <- p$layers[-1]
p + geom_point(size=5, alpha=0.7)

Heatmaps

gpt <- subset_taxa(GlobalPatterns, Kingdom=="Bacteria")
gpt <- prune_taxa(names(sort(taxa_sums(gpt),TRUE)[1:300]), gpt)
plot_heatmap(gpt, sample.label="SampleType")
## Warning: Transformation introduced infinite values in discrete y-axis

gpac <- subset_taxa(GlobalPatterns, Phylum=="Crenarchaeota")
plot_heatmap(gpac)
## Warning: Transformation introduced infinite values in discrete y-axis

(p <- plot_heatmap(gpac, "NMDS", "bray", "SampleType", "Family"))
## Warning: Transformation introduced infinite values in discrete y-axis

p$scales$scales[[1]]$name <- "My X-Axis"
p$scales$scales[[2]]$name <- "My Y-Axis"
print(p)
## Warning: Transformation introduced infinite values in discrete y-axis

plot_heatmap(gpac, "NMDS", "bray", "SampleType", "Family", low="#000033", high="#CCFF66")
## Warning: Transformation introduced infinite values in discrete y-axis

plot_heatmap(gpac, "NMDS", "bray", "SampleType", "Family", low="#000033", high="#FF3300")
## Warning: Transformation introduced infinite values in discrete y-axis

plot_heatmap(gpac, "NMDS", "bray", "SampleType", "Family", low="#000033", high="#66CCFF")
## Warning: Transformation introduced infinite values in discrete y-axis

plot_heatmap(gpac, "NMDS", "bray", "SampleType", "Family", low="#66CCFF", high="#000033", na.value="white")
## Warning: Transformation introduced infinite values in discrete y-axis

plot_heatmap(gpac, "NMDS", "bray", "SampleType", "Family", low="#FFFFCC", high="#000033", na.value="white")
## Warning: Transformation introduced infinite values in discrete y-axis

plot_heatmap(gpac, "NMDS", "jaccard")
## Warning: Transformation introduced infinite values in discrete y-axis

plot_heatmap(gpac, "DCA", "none", "SampleType", "Family")
## Warning: Transformation introduced infinite values in discrete y-axis

plot_heatmap(gpac, "RDA", "none", "SampleType", "Family")
## Warning: Transformation introduced infinite values in discrete y-axis

plot_heatmap(gpac, "PCoA", "bray", "SampleType", "Family")
## Warning: Transformation introduced infinite values in discrete y-axis

plot_heatmap(gpac, "PCoA", "unifrac", "SampleType", "Family")
## Warning: Transformation introduced infinite values in discrete y-axis

plot_heatmap(gpac, "MDS", "unifrac", "SampleType", "Family", weighted=TRUE)
## Warning: Transformation introduced infinite values in discrete y-axis

heatmap(otu_table(gpac))

Network Analysis

data("enterotype")
set.seed(711L)
enterotype = subset_samples(enterotype, !is.na(Enterotype))
plot_net(enterotype, maxdist = 0.4, point_label = "Sample_ID")

plot_net(enterotype, maxdist = 0.3, color = "SeqTech", shape="Enterotype")

ig <- make_network(enterotype, max.dist=0.3)
plot_network(ig, enterotype)

plot_network(ig, enterotype, color="SeqTech", shape="Enterotype", line_weight=0.4, label=NULL)

ig <- make_network(enterotype, max.dist=0.2)
plot_network(ig, enterotype, color="SeqTech", shape="Enterotype", line_weight=0.4, label=NULL)

ig <- make_network(enterotype, dist.fun="bray", max.dist=0.3)
plot_network(ig, enterotype, color="SeqTech", shape="Enterotype", line_weight=0.4, label=NULL)